############################################################ ########### #
#
# Project: Stability and change in the opinion-policy relationship
# 
# This script executes all analyses and produces all results, figs etc
# 
# 2022.09.30. 
############################################################ ########### #


library(patchwork)
library(ggridges)
library(rstanarm)
library(bayesplot)
library(stargazer)
library(patchwork)
library(here)
library(geofacet)
library(tidybayes)
library(plm)
library(ggplot2)
library(tidyr)
library(dplyr)
library(forcats)


# set cores for mcmc chains
options(mc.cores = parallel::detectCores())

# import survey and census data
mydata <- readRDS(here("B-analysis-data/survey_merged.rds")) %>% 
        mutate(stateyear = gsub("2013", "2014", stateyear))
census <- readRDS(here("B-analysis-data/census.rds"))


# Stage 1: multilevel modelling --------------------------------------------
# 
# # multilevel modelling happens in two stages. first we fit binomial models 
# #       on whether response is larger or zero (vs 0). 
# # we built models in a stepwise fashion seeking to find best model fit with 
# #       leave-one out cross-validation. 


# simple quadratic smoother
fit1.1 <- stan_glmer(response > 0 ~ 1 + year_std + I(year_std^2) +
                             (1 | female:age)+
                             (1 | edu)+
                             (1 | racecat)+
                             (1 | year)+
                             (1 | item) +
                             (1 | stateyear) +
                             (1 | state),
                     data = mydata,
                     family = "binomial",
                     chains=4,
                     seed = 1, iter=2000,
                     control = list(adapt_delta = 0.95))
loo1.1 <- loo(fit1.1)
saveRDS(fit1.1, here("B-analysis-data/models/fit1.1.rds"))


# use rent as smoother
fit1.2 <- stan_glmer(response > 0 ~ 1 + rent +
                             (1 | female:age)+
                             (1 | edu)+
                             (1 | racecat)+
                             (1 | year)+
                             (1 | item) +
                             (1 | stateyear) +
                             (1 | state),
                     data = mydata,
                     family = "binomial",
                     chains=4,
                     seed = 1, iter=2000,
                     control = list(adapt_delta = 0.95))
loo1.2 <- loo(fit1.2)
saveRDS(fit1.2, here("B-analysis-data/models/fit1.2.rds"))

# use presidential vote share (presvote) as smoother
fit1.3 <- stan_glmer(response > 0 ~ 1 + demshare +
                             (1 | female:age)+
                             (1 | edu)+
                             (1 | racecat)+
                             (1 | year)+
                             (1 | item) +
                             (1 | stateyear) +
                             (1 | state),
                     data = mydata,
                     family = "binomial",
                     chains=4,
                     seed = 1, iter=2000,
                     control = list(adapt_delta = 0.95))
loo1.3 <- loo(fit1.3)
saveRDS(fit1.3, here("B-analysis-data/models/fit1.3.rds"))

# use both presvote and rent as smoother
fit1.4 <- stan_glmer(response > 0 ~ 1 + demshare + rent +
                             (1 | female:age)+
                             (1 | edu)+
                             (1 | racecat)+
                             (1 | year)+
                             (1 | item) +
                             (1 | stateyear) +
                             (1 | state),
                     data = mydata,
                     family = "binomial",
                     chains=4,
                     seed = 1, iter=2000,
                     control = list(adapt_delta = 0.95))
loo1.4 <- loo(fit1.4)
saveRDS(fit1.4, here("B-analysis-data/models/fit1.4.rds"))

# find best fitting model.
loo_compare(loo1.1, loo1.2, loo1.3, loo1.4)



# part 2 - conditional on having non-zero pref. what MinWage people want? #### #

# simple quadratic smoother
fit2.1 <- stan_lmer(response ~ 1 + year_std + I(year_std^2) +
                            (1 | female:age)+
                            (1 | edu)+
                            (1 | racecat)+
                            (1 | year)+
                            (1 | item) +
                            (1 | stateyear) +
                            (1 | state),
                    data = mydata,  
                    subset = response > 0,
                    chains=4,
                    seed = 1, iter=2000,
                    control = list(adapt_delta = 0.95))
loo2.1 <- loo(fit2.1)
saveRDS(fit2.1, here("B-analysis-data/models/fit2.1.rds"))


# use rent as smoother
fit2.2 <- stan_lmer(response ~ 1 + rent +
                            (1 | female:age)+
                            (1 | edu)+
                            (1 | racecat)+
                            (1 | year)+
                            (1 | item) +
                            (1 | stateyear) +
                            (1 | state),
                    data = mydata,  
                    subset = response > 0,
                    chains=4,
                    seed = 1, iter=2000,
                    control = list(adapt_delta = 0.95))
loo2.2 <- loo(fit2.2)
saveRDS(fit2.2, here("B-analysis-data/models/fit2.2.rds"))

# use presvote as smoother
fit2.3 <- stan_lmer(response ~ 1 + demshare +
                            (1 | female:age)+
                            (1 | edu)+
                             (1 | racecat)+
                             (1 | year)+
                             (1 | item) +
                             (1 | stateyear) +
                             (1 | state),
                     data = mydata,  
                    subset = response > 0,
                    chains=4,
                    seed = 1, iter=2000,
                    control = list(adapt_delta = 0.95))
loo2.3 <- loo(fit2.3)
saveRDS(fit2.3, here("B-analysis-data/models/fit2.3.rds"))

# use both presvote and rent as smoother
fit2.4 <- stan_lmer(response ~ 1 + demshare + rent +
                            (1 | female:age)+
                            (1 | edu)+
                            (1 | racecat)+
                            (1 | year)+
                            (1 | item) +
                            (1 | stateyear) +
                            (1 | state),
                     data = mydata,  
                    subset = response > 0,

                     chains=4,
                     seed = 1, iter=2000,
                     control = list(adapt_delta = 0.95))
loo2.4 <- loo(fit2.4)
saveRDS(fit2.4, here("B-analysis-data/models/fit2.4.rds"))


# what if we throw in state:item interactions?
fit2.5 <- stan_lmer(response ~ 1 + demshare + rent +
                            (1 | female:age)+
                            (1 | edu)+
                            (1 | racecat)+
                            (1 | year)+
                            (1 | item) +
                            (1 | stateyear) +
                            (1 | state) +
                            (1 | state:item),
                    data = mydata,  subset = response > 0,
                    chains=4,
                    seed = 1, iter=2000,
                    control = list(adapt_delta = 0.95))
loo2.5 <- loo(fit2.5)
saveRDS(fit2.5, here("B-analysis-data/models/fit2.5.rds"))


# what if we throw in state:item interactions?
fit2.6 <- stan_lmer(response ~ 1 + demshare +
                            (1 | female:age)+
                            (1 | edu)+
                            (1 | racecat)+
                            (1 | year)+
                            (1 | item) +
                            (1 | stateyear) +
                            (1 | state) +
                            (1 | state:item),
                    data = mydata,  subset = response > 0,
                    chains=4,
                    seed = 1, iter=2000,
                    control = list(adapt_delta = 0.99))
loo2.6 <- loo(fit2.6)
saveRDS(fit2.6, here("B-analysis-data/models/fit2.6.rds"))

# find best fitting model.
loo_compare(loo2.1, loo2.2, loo2.3, loo2.4, loo2.5, loo2.6)

set.seed(13012)


# running all bayesian MLM models takes quite some time. 
#       we exported model outputs to save the trouble. 
myfit_binom1 <- readRDS(here("B-analysis-data/models/fit1.3.rds"))
myfit_lin1 <- readRDS(here("B-analysis-data/models/fit2.3.rds"))


# export random effect estimates for binomial model
ranef_binom1 <-  myfit_binom1 %>%
        spread_draws(b[,group]) %>% 
        median_qi(b, .width = c(.67, .89, .97)) %>% 
        filter(!grepl("state", group)) 

# export fixed effect estimates  
fixef_binom1 <-  myfit_binom1 %>%
        gather_draws(demshare) %>%
        mutate(.value = ifelse(.variable == "rent",
                               .value * sd(mydata$rent),
                               .value * sd(mydata$demshare))) %>%
        group_by(group = .variable) %>% 
        median_qi(b=.value,  .width = c(.67, .89, .97)) %>% print

# combine to get all coefs from binomial model
coefs_binom1 <- bind_rows(ranef_binom1, fixef_binom1)

# take a brief look at the coef if interested
ggplot(coefs_binom1, aes(y = group, x = b, xmin = .lower, xmax = .upper)) +
        geom_vline(aes(xintercept = 0)) +
        geom_pointinterval() +
        theme_minimal()


# repeat for linear model. get random effect estimates.
ranef_lin1 <-  myfit_lin1 %>%
        spread_draws(b[,group]) %>% 
        median_qi(b, .width = c(.67, .89, .97)) %>% 
        filter(!grepl("state", group))

# get fixed effects estimates. 
fixef_lin1 <- myfit_lin1 %>%
        gather_draws(demshare) %>% 
        mutate(.value = ifelse(.variable == "rent",
                               .value * 2 * sd(mydata$rent),
                               .value * 2 * sd(mydata$demshare))) %>%
        group_by(group = .variable) %>% 
        median_qi(b=.value,  .width = c(.67, .89, .97)) %>% print

# combine all coefs from linear model
coefs_lin1 <- bind_rows(ranef_lin1, fixef_lin1)

# quick glance
# ggplot(coefs_lin1, aes(y = group, x = b, xmin = .lower, xmax = .upper)) +
#         geom_vline(aes(xintercept = 0)) +
#         geom_pointinterval() +
#         theme_minimal()



# combine all coefs in a neat dataframe
coefs <- bind_rows("Logistic" = coefs_binom1, 
                   "Linear" = coefs_lin1,
                   .id = "model") %>% 
        mutate(group = fct_relevel(group, "demshare", "edu:nohs", "edu:hs",
                                   "edu:somecol", "edu:col", "edu:postgrad"),
               model = fct_relevel(model, "Logistic")) %>% 
        filter(.width != .97)

# ~ Figure OA1 ------------------------------------------------------
# This is reported as Figure OA1. in the online appendix.
ggplot(coefs, aes(y = group, x = b, xmin = .lower, xmax = .upper, 
                  color = model)) +
        geom_vline(aes(xintercept = 0)) +
        geom_pointinterval(position = ggstance::position_dodgev(.7)) +
        facet_grid(~ model, scales= "free") + 
        scale_color_discrete(guide = NULL) +
        xlab("Coefficient estimates and credible intervals") + 
        ylab("") + 
        theme_minimal()

# export
ggsave(here("D-documents/figures/MLM_coefs_nostate.pdf"), width = 7, height = 5)

# Stage 2 : post-stratification -------------------------------------------


# run a naive MLM regression without accounting for excess 0s. 
data_bi <- mydata %>% 
        select(response, demshare, female, edu, racecat, age, year, item,
               stateyear, state) %>% 
        na.omit() %>% 
        glimpse

# investigate the posterior predictive checks for both models. 
        # this needs a bit of tweaking as we are subsetting the data. 
bayesplot::pp_check(object = as.numeric(data_bi$response > 0), 
                    yrep = posterior_predict(myfit_binom1, draw = 50), 
                    ppc_bars)

bayesplot::pp_check(object = data_bi$response[data_bi$response > 0], 
                    yrep = posterior_predict(myfit_lin1,
                                             draw = 50), 
                    ppc_dens_overlay)


post.strat <- function(mybinom, mylin){
        
        # first generate posterior predictions from logistic regression 
        #       note that we want a transformed prediction.
        #       this tells us the proportion of respondents in each cell who want to have 
        #       a state minimum wage. 
        
        binom_postdraw <- rstanarm::posterior_epred(mybinom, newdata=census, draw=4000)
        # mean(colMeans(binom1_postdraw))
        cat("Binom post-draws done\n")
        
        # and the linear part. this tells us the minwage people in each cell want on avg.
        #       conditional that it's not 0. 
        lin_postdraw <- posterior_linpred(mylin, newdata=census, draw=4000, 
                                           transform = FALSE)
        cat("Linear post-draws done\n")
        # mean(colMeans(lin1_postdraw))
        
        # combine the cell level predictions.  a simple multiplication should work:
        #       we are predicting 0 for 1-p respondents. for the rest, we take what the 
        #       second model says.
        comb_stratified <- (census$freq/census$statefreq * t(binom_postdraw) * 
                                    t(lin_postdraw) ) %>% 
                apply(., 2, function(x) tapply(x, census$stateyear, sum))
        # write.csv(comb1_stratified, "B-analysis-data/statepred-draws.csv")
        cat("Post-strat done\n")
        comb_stratified
}



my.agg <- function(comb_stratified){
        
        # ~aggregate with Highest density continuous intervals (HDCI) ----------------
        stateyear <- sort(unique(mydata$stateyear))
        
        # loop through the posterior draws from the model
        # and calculate HDCI 
        comb_hdci <- do.call(rbind, apply(X = comb_stratified, 
                                           MARGIN = 1, 
                                           FUN = median_hdci, .width = .9, 
                                           .simple_names = T)) %>% 
                mutate(stateyear = stateyear) %>% 
                separate(stateyear, c("statename", "year"), -4, remove = F) %>% 
                mutate(statename = trimws(statename), 
                       year = as.double(year)) %>% 
                select(-.point, -.interval) %>% 
                rename(median = .value, 
                       state_year = stateyear)
        
        comb_hdci
}

# running all bayesian MLM models takes quite some time. 
#       we exported model outputs to save the trouble. 
myfit_binom_base <- readRDS(here("B-analysis-data/models/fit1.1.rds"))
myfit_lin_base<- readRDS(here("B-analysis-data/models/fit2.1.rds"))


comb1_stratified <- post.strat(myfit_binom1, myfit_lin1)
comb1_hdci <- my.agg(comb1_stratified)

comb2_stratified <- post.strat(myfit_binom_base, myfit_lin_base)
comb2_hdci <- my.agg(comb2_stratified)


# write.csv(comb1_stratified, "B-analysis-data/statepred-draws.csv")
# comb1_stratified <- read.csv("B-analysis-data/statepred-draws.rds")

# inspect mean and SD of estimates
# mean(comb1_hdci$.upper - comb1_hdci$.lower)
# sd(comb1_hdci$.upper - comb1_hdci$.lower)

# export state level minimum wage preference predictions
write.csv(comb1_hdci, here("B-analysis-data/state_prefs.csv"))

# import state preference estimates if this is where you are joining
        # the party
comb1_hdci <- read.csv( here("B-analysis-data/state_prefs.csv"))



# this is a visualization of the results we didn't end up including in the paper, 
#       but i quite like.
comb1_stratified %>% 
        t() %>% 
        as_tibble() %>% 
        pivot_longer(everything(), names_to = "stateyear", values_to = "draws") %>% 
        separate(stateyear, c("state", "year"), -4, remove = F) %>% 
        mutate(state = trimws(state), 
               state = fct_reorder(state, draws, .desc = T)) %>% 
        
ggplot(., aes(y = state, x = draws)) +
        geom_density_ridges() +
        facet_grid(~year) + 
        ylab("") + xlab("Posterior Distributions of State Minimum Wage Preferences") + 
        theme_minimal()
# ggsave("D-documents/figures/minwage-distribution.jpg",
# width = 8, height = 8)


# Post-stratify for national average --------------------------------------

# calculate national proportion of a given census cell, by dividing with natfreq 
national <- (census$freq/census$natfreq * t(binom1_postdraw) * t(lin1_postdraw)) %>% 
        apply(., 2, function(x) tapply(x, census$year, sum))

# and get the HDCI
national1_hdci <- do.call(rbind, apply(X = national, 
                                      MARGIN = 1, 
                                      FUN = median_hdci, .width = c(.67, .89, .97),
                                      .simple_names = T)) %>% 
        pivot_longer(c(.lower, .upper), names_to = "CI", values_to = "value") %>% 
        unite("CI",  c(CI, .width)) %>% 
        pivot_wider(names_from = CI, values_from = value) %>% 
        mutate(year = c(2014, 2016, 2019, 2021)) %>%
        glimpse()

# export national level preference estimates 
write.csv(national1_hdci, here("B-analysis-data/national_pref.csv"))


# Descriptives reported in the text ---------------------------------------------------------

# and import if you don't want to run all of the above. 
national1_hdci <- read.csv( here("B-analysis-data/national_pref.csv"))

# calculate the population fore
state_pops <- census %>% 
        filter(!duplicated(stateyear)) %>% 
        select(state, year, statefreq)

# import our own estimates on minimum wage preferences
#   (to avoid rerunning MLM models and post-stratification each time)
prefs <- read.csv(here("B-analysis-data/state_prefs.csv")) %>% 
        select(-.width) 

# import data on minimum wage for US states by year
pols <- rio::import(here("A-original-data/policies.dta")) %>% 
        filter(state != "Puerto Rico") %>% 
        mutate(state_lower = tolower(state))

# import abbreviations for us states
abbs <- read.csv(here("E-metadata/abbs.csv"))


# merge all data into tidy dataframe.
df <- left_join(prefs, pols, by =c("statename" = "state", "year" = "year")) %>% 
        left_join(abbs, by = c("statename" = "state")) %>% 
        mutate(bias = wage - median,
               bias.lower = wage - .lower, 
               bias.upper = wage - .upper,
               abs_bias = abs(bias), 
               abs_bias.lower = abs(bias.lower), 
               abs_bias.upper = abs(bias.upper)) %>% 
        arrange(statename, year)

# median preferences by year 
df %>% 
        group_by(year) %>% 
        summarise(median = median(median),
                  median.lower = median(.lower),
                  median.upper = median(.upper))


# Get posterior distributions for changes in MW prefs 2014-21

# get rid of years 16-19 for each state 
df_diffs_1 <- comb1_stratified[c(T,F,F,T),]
# then calculate the diff between each odd and even row. 
df_diffs <- df_diffs_1[c(F, T), ] -  df_diffs_1[c(T, F), ] 

# which should result in 4000 posterior draws from the 14-21 difference
# dim(df_diffs)

# ... which we then can aggregate 
pref_change2 <- do.call(rbind, apply(X = df_diffs, 
                                     MARGIN = 1, 
                                     FUN = median_hdci, .width = .9, 
                                     .simple_names = T)) %>% 
        mutate(stateyear = sort(unique(mydata$state))) %>% 
        rename(change = .value) %>% 
        arrange(change)

head(pref_change2)
tail(pref_change2)

# round(mean(pref_change2$change), 1)
# sd(pref_change2$change)

# median policy
df %>% 
        group_by(year) %>% 
        summarise(median = median(wage))           
                  

# largest smallest change across 7 years
pol_change <- df %>% 
        select(statename, year, wage) %>% 
        pivot_wider(names_from = year, values_from = wage) %>% 
        mutate(change = `2021` - `2014`) %>% 
        arrange(desc(change))
head(pol_change)
tail(pol_change)

mean(pol_change$change)
sd(pol_change$change)
sum(pol_change$change == 0)

# somechange <- pol_change$statename[pol_change$change!=0]

# ~ Figure 2. aka "Wormplot" ----------------------------------------------------------------
# This figure visualizes our state-year level data 

my_grid <- us_state_grid1 %>% 
        mutate(name_labels = fct_recode(name, 
                                        `New Hamp.` = "New Hampshire",
                                        Massachus. = "Massachusetts", 
                                        `N. Dakota` = "North Dakota", 
                                        `S. Dakota` = "South Dakota", 
                                        `N. Carolina` = "North Carolina", 
                                        `S. Carolina` = "South Carolina", 
                                        DC = "District of Columbia"))

ggplot(df, aes(x = median, y = wage, color = abs_bias)) +
        geom_abline(aes(intercept = 0, slope = 1), size = 0.4) +
        geom_point(aes(shape = factor(year))) +
        geom_errorbarh(aes(xmin = .lower, xmax =.upper), height = 0) +
        geom_path()+
        facet_geo(~ statename, grid = my_grid, label = "name_labels")+
        xlab("Average preference") +
        ylab("Minimum wage") +
        scale_colour_gradient(low = "grey", high = "black", guide = NULL)+
        scale_x_continuous(n.breaks = 5) + 
        scale_y_continuous(n.breaks = 5) + 
        scale_shape_discrete(name = NULL) +
        theme_minimal() +
        theme(legend.position = "bottom")

ggsave(here("D-documents/figures/mw-trajectory-bw-shapes.pdf"), 
       width = 9.1, height = 7.5)



# Static responsiveness ---------------------------------------------------

# ~ Figure 1. in the main text #############
ggplot(df, aes(x = median, y = wage)) +
        geom_smooth(method = "lm", se = T) +
        geom_point(alpha = .5, size = .3) +
        # geom_text(aes(label = abb), check_overlap = T, size = 2, vjust = 'top') + 
        facet_grid(~ year) + 
        geom_abline(slope = 1, intercept = 0) +
        geom_rug(alpha = .5) + 
        xlim(c(6, 16.5)) + 
        ylim(c(6, 16.5)) +
        # ggtitle("A") +
        xlab("Average Minimum Wage Preference ($)") + 
        ylab("State Minimum Wage ($)") + 
        theme_minimal()
ggsave(here("D-documents/figures/static_responsiveness.pdf"), 
       height = 3, width = 8)

# export regression results 
stat.lm <- function(x){
        df %>% 
                filter(year == x) %>% 
                mutate(median = median - median(median)) %>% 
                lm(wage ~ median, .) 
}

stat1 <- stat.lm(2014)
stat2 <- stat.lm(2016)
stat3 <- stat.lm(2019)
stat4 <- stat.lm(2021)

# ~ Table OA3. ###########
stargazer::stargazer(
        stat1,stat2, stat3, stat4,
        type = "text", 
        digits =2, ci.level = .9,
        column.labels = c("2014", "2016", "2019", "2021"),
        model.numbers = F,
        dep.var.labels = "State Minimum Wage",
        covariate.labels = "Minimum Wage Preference",
        omit.stat = c("ser", "rsq", "f"),ci = T,
        # star.cutoffs = c(0.05, 0.01, 0.001), 
        star.cutoffs = NA, 
        notes = "Uncertainty estimates denote 90\\% credible intervals",
        notes.append = FALSE,
        title = "Estimates of static responsiveness",
        label = "tab:static",
        out = here("D-documents/tables/static_resp.tex")
)



# ~ Propagate uncertainty  ---------------------------

stat_fe <- matrix(NA, nrow = ncol(comb1_stratified),
                  ncol= 4)

# for each year, and each posterior draw loop through
#       (takes about 1 min on our server). 
for(i in 1:ncol(comb1_stratified)){
  
  if(i %% 200==0) {
    # Print on the screen some message
    cat(paste0("iteration: ", i, "\n"))
  }
  
  # run the regression
  for(j in 1:4){
    myyear <- c(2014, 2016, 2019, 2021)[j]
    myfit <- stat.lm(myyear) 
    # take coefficient
    hatB <- coef(myfit)[2]
    # and cov matrix
    hatV <- sandwich::vcovHC(myfit)[2,2]
    # and from these two generate a single draw with noise.
    stat_fe[i, j] <- MASS::mvrnorm(n = 1, mu = hatB, Sigma = hatV)
  }
}


# combine estimates into table
static_moc <- bind_cols(
  round(as.double(quantile(stat_fe[, 1], probs = c(0.05, 0.5, 0.95))), 2),
  round(as.double(quantile(stat_fe[, 2], probs = c(0.05, 0.5, 0.95))), 2),
  round(as.double(quantile(stat_fe[, 3], probs = c(0.05, 0.5, 0.95))), 2),
  round(as.double(quantile(stat_fe[, 4], probs = c(0.05, 0.5, 0.95))), 2)
) 

# add labels 
names(static_moc)   <- c("2014", "2016", "2019", "2021")
static_moc$names <- c("5%", "Median", "95%") 

# these values are added manually to the regression table with "naive" 
#   (ie. not propagated) estimates above. 
static_moc

# Relative bias -----------------------------------------------------------

# Descriptives reported in the text

# median bias
med.tab <- df %>% group_by(year) %>% 
        summarise(yearly_median = median(bias),
                  ym.lower = median(bias.lower), 
                  ym.upper = median(bias.upper))
round(med.tab, 1)

# minimum bias
min.tab <- df %>% group_by(year) %>% 
        summarise(min = min(bias), 
                  min.lower = min(bias.lower),
                  min.upper = min(bias.upper))

# maximum bias
max.tab <- df %>% group_by(year) %>% 
        summarise(max = max(bias), 
                  max.lower = max(bias.lower),
                  max.upper = max(bias.upper))

# range of bias
range.tab <- tibble(year =  c(2014, 2016, 2019, 2021),
                     range = max.tab$max - min.tab$min, 
                     range.lower = max.tab$max.lower -min.tab$min.lower, 
                     range.upper = max.tab$max.upper -min.tab$min.upper)

# most liberal state in 2014 (lowest conservative bias) 
df %>% filter(year == 2014 & bias == max(bias[year == 2014])) %>% 
        select(state_lower, bias, bias.lower, bias.upper)

# states with more lib bias than the most lib state in 2014
df %>% filter(year == 2021 & bias > max(bias[year == 2014]))%>% 
        select(abb, bias, bias.lower, bias.upper)

# states with  lib bias in 2021
df %>% filter(year == 2021 & bias > 0)%>% 
        select(state_lower, bias, bias.lower, bias.upper)


# save estimates for bias at the national level
nat_pols <- pols %>%
        mutate(state = tolower(state)) %>%
        left_join(state_pops) %>% 
        filter(year %in% c(2014, 2016, 2019, 2021)) %>% 
        group_by(year) %>% 
        summarise(nat_wage = weighted.mean(wage, statefreq, na.rm = T),
                  median_state = median(wage)) %>% 
        ungroup() %>% 
        left_join(national1_hdci, by = "year") %>% 
        mutate(bias = nat_wage - .value,
               abs_bias = abs(bias), 
               b.lower_0.67 = nat_wage - .lower_0.67,
               b.lower_0.89 = nat_wage - .lower_0.89, 
               b.lower_0.97 = nat_wage - .lower_0.97,
               b.upper_0.67 = nat_wage - .upper_0.67,
               b.upper_0.89 = nat_wage - .upper_0.89, 
               b.upper_0.97 = nat_wage - .upper_0.97, 
               a.lower_0.67 = abs(b.lower_0.67 ),
               a.lower_0.89 = abs(b.lower_0.89 ),
               a.lower_0.97 = abs(b.lower_0.97 ),
               a.upper_0.67 = abs(b.upper_0.67 ),
               a.upper_0.89 = abs(b.upper_0.89 ),
               a.upper_0.97 = abs(b.upper_0.97 )
               ) %>% 
        print


# ~ Figure 3. Change in policy bias by year -------------------------------

ggplot(df, aes(x = year, y = bias)) +
        geom_line(aes(group = statename), size = .1, alpha =.4) + 
        geom_line(data=nat_pols, size = 1.4) +
        geom_errorbar(data=nat_pols, aes(ymin = b.lower_0.67,
                                         ymax = b.upper_0.67),
                      size = 1.8,
                      width = 0) +
        geom_errorbar(data=nat_pols, aes(ymin = b.lower_0.89,
                                         ymax = b.upper_0.89),
                      size = .9,
                      width = 0) +
        geom_errorbar(data=nat_pols, aes(ymin = b.lower_0.97,
                                         ymax = b.upper_0.97),
                      size = .45,
                      width = 0) +
        geom_abline(aes(intercept =0, slope =0), size = 0.2) +
        scale_x_continuous(breaks = c(2014, 2016, 2019, 2021), 
                           # labels = c("14", "16", "19", "21"),
                           limits = c(2013, 2022))+
        xlab("") + ylab("Policy bias ($)") + 
        theme_minimal()
ggsave(here("D-documents/figures/relative_bias_spagetti.pdf"))


# ~ Policy bias & Direct democracy -------------------------------------

# import data on direct democracy for each state 
dd <- rio::import(here("B-analysis-data/state_data_dd.dta"))


# ~ Figure OA9 ------------------------------------------------------------

# plot bias conditional on state level ballot initiatives or no 
df_dd <- left_join(df, dd) %>% 
        filter(abb != "DC") %>% 
        mutate(directdem = ifelse(directdem == 0, "No direct dem.", "Direct dem."), 
               directdem = fct_relevel(directdem, "No direct dem."))

ggplot(df_dd, aes(x = year, y = bias, color = as.factor(directdem))) +
        geom_line(aes(group = statename), size = .2) + 
        geom_abline(aes(intercept =0, slope =0), size = 0.2) +
        scale_x_continuous(breaks = c(2014, 2016, 2019, 2021), 
                           limits = c(2013, 2022))+
        scale_color_viridis_d(guide = NULL)+
        facet_grid(~ directdem) + 
        xlab("") + ylab("Policy bias ($)") + 
        theme_minimal()
ggsave(here("D-documents/figures/directdem_spagetti.pdf"), width = 6, height = 4)


# Results reported in Section C5. of the OA  
df_dd_wide <- df_dd %>% 
        filter(year %in% c(2014, 2021)) %>% 
        select(statename, bias, year, directdem) %>% 
        pivot_wider(names_from = year, values_from = bias) %>% 
        mutate(trend = `2021` - `2014`)

# stargazer(lm(`2014` ~ 0 + directdem, df_dd_wide),
#           lm(`2021` ~ 0 +directdem, df_dd_wide),
#           type = "text", digits =2 )
# stargazer(lm(`2014` ~ 1 + directdem, df_dd_wide),
#           lm(`2021` ~ 1 +directdem, df_dd_wide),
#           type = "text", digits = 2)

tapply(df_dd_wide$`2014`, df_dd_wide$directdem, mean)
# round(diff(tapply(df_dd_wide$`2014`, df_dd_wide$directdem, mean)), 2)

tapply(df_dd_wide$`2021`, df_dd_wide$directdem, mean)
# round(diff(tapply(df_dd_wide$`2021`, df_dd_wide$directdem, mean)), 2)

# calculate changes in  bias in states with and without directdem
stargazer(lm(trend ~  0 + directdem, df_dd_wide),
          type = "text", digits = 2, ci.level = .9)


# ~ Policy bias & Partisanship ------------------------------------------------------------

presvote12 <- readRDS(here("B-analysis-data/presvote2012.rds")) %>% 
        mutate(demcat = case_when(demshare <= 0.47 ~ "Red", 
                                  demshare >= .53 ~ "Blue", 
                                  TRUE ~ "Purple"), 
               demcat = fct_relevel(demcat, "Blue", "Purple"))

quantile(presvote12$demshare, c(.33, .67))
hist(presvote12$demshare)        
count(presvote12, demcat)

# ~ Figure OA10 ------------------------------------------------------------
df_pv <- left_join(df, select(presvote12, -year), by = c("state_lower" = "state")) 
df_pv_wide <- df_pv %>% 
        filter(year %in% c(2014, 2021)) %>% 
        select(statename, bias, year, demcat ,demshare) %>% 
        pivot_wider(names_from = year, values_from = bias) %>% 
        
        mutate(trend = `2021` - `2014`, 
               gap14 =ifelse(`2014` <= median(`2014`), "Above", "Below"))

summary(lm(`2014` ~ 0 + demcat, df_pv_wide))
summary(lm(`2021` ~ 0 + demcat, df_pv_wide))

stargazer(lm(trend ~ 0 + demcat, df_pv_wide), 
          type = "text", digits =2)

df_pv_wide %>% 
        group_by(demcat) %>% 
        summarise(mean = mean(trend), 
                  min = min(trend), 
                  max = max(trend), 
                  range = min - max) 

ggplot(df_pv_wide, aes(x = demshare, y = trend)) +
        geom_point() +
        geom_smooth() +
        xlab("Obama 2012 vote share") +
        ylab("Change in policy bias between 2014 and 2021") + 
        theme_minimal() 
ggsave(here("D-documents/figures/vote_trend.pdf"), height = 5)


# ~ Figure OA11 --------------------------------
ggplot(df_pv, aes(x = year, y = bias, color = demshare)) +
        geom_line(aes(group = statename), size = .2) + 
        geom_abline(aes(intercept =0, slope =0), size = 0.2) +
        scale_x_continuous(breaks = c(2014, 2016, 2019, 2021), 
                           # labels = c("14", "16", "19", "21"),
                           limits = c(2013, 2022))+
        # scale_color_manual(guide = NULL, values = c("blue", "purple", "red"))+
        scale_color_gradientn(colors = c("red", "purple", "blue", "darkblue"), 
                              name = "Obama '12 \nVote Share") +
        facet_grid(~ demcat) + 
        xlab("") + ylab("Policy bias ($)") + 
        theme_minimal()
ggsave(here("D-documents/figures/voteshare_spagetti.pdf"), height = 4, width = 8)


# Additional statistics
cor.test( ~ demshare + trend, df_pv_wide, method = "spearman")
cor.test( ~ demshare + trend, df_pv_wide)



# ~ inflation --------------------

# source: https://data.worldbank.org/indicator/FP.CPI.TOTL.ZG?end=2021&locations=US&start=2014 
infl <- tribble(
        ~year, ~inflation,
        2014, 1.016,
        2015, 1.001,
        2016, 1.013,
        2017, 1.021,
        2018, 1.024,
        2019, 1.018,
        2020, 1.012,
        2021, 1.047
)

infl.multi <- tribble(
        ~year, ~inflation,
        2014, 1, 
        2016, cumprod(infl$inflation[1:2])[2], 
        2019, cumprod(infl$inflation[1:6])[6],
        2021, cumprod(infl$inflation[1:8])[8],
)

med.tab %>% 
        left_join(infl.multi) %>% 
        mutate(across(c(-year, -inflation), function(x) x / inflation, .names = "{.col}_infl"))

nat_pols_infl <- nat_pols %>% 
        left_join(infl.multi) %>% 
        mutate(across(bias:a.upper_0.97, function(x) x / inflation)) %>% 
        glimpse

df %>% 
        left_join(infl.multi) %>% 
        mutate(bias = bias/ inflation) %>% 

# ~ Figure OA2 ------------------------------------------------------------

ggplot( aes(x = year, y = bias)) +
        geom_line(aes(group = statename), size = .1, alpha =.4) + 
        geom_line(data=nat_pols_infl, size = 1.4) +
        geom_errorbar(data=nat_pols_infl, aes(ymin = b.lower_0.67,
                                         ymax = b.upper_0.67),
                      size = 1.8,
                      width = 0) +
        geom_errorbar(data=nat_pols_infl, aes(ymin = b.lower_0.89,
                                         ymax = b.upper_0.89),
                      size = .9,
                      width = 0) +
        geom_abline(aes(intercept =0, slope =0), size = 0.2) +
        scale_x_continuous(breaks = c(2014, 2016, 2019, 2021), 
                           # labels = c("14", "16", "19", "21"),
                           limits = c(2013, 2022))+
        xlab("") + ylab("Policy bias (in 2014$)") + 
        theme_minimal()
ggsave(here("D-documents/figures/relative_bias_spagetti_infl.pdf"))

# Dynamic responsiveness --------------------------------------------------

# "naive" Fixed effects model
fe_df <- pdata.frame(df, index=c("statename","year"), drop.index=TRUE, row.names=TRUE)
femodel2 <- plm(wage~median, 
                data = fe_df, 
                effect="individual", 
                model = "within")
summary(femodel2)


# ~ Propagate uncertainty --------------------


dyn_fe_df2 <- as_tibble(comb1_stratified) %>%
        mutate(stateyear = rownames(comb1_stratified)) %>%
        separate(stateyear, c("state", "year"),  -4) %>%
        mutate( year = as.numeric(year),
                state = trimws(state)) %>%
        left_join(pols) %>%
        pdata.frame(index=c("state"), drop.index=F, row.names=TRUE)


stopifnot(identical(sort(tolower(unique(dyn_fe_df2$state))),
                    sort(tolower(unique(pols$state)))))


dyn_fe2 <- NA
for(i in 1:ncol(comb1_stratified)){
        # for(i in 1:100){
        
        if(i %% 200==0) {
                # Print on the screen some message
                cat(paste0("iteration: ", i, "\n"))
        }
        prop_femodel <- plm(wage ~ dyn_fe_df2[, i],
                            data = dyn_fe_df2,
                            effect="individual",
                            model = "within")
        hatB_s <- coef(prop_femodel)
        hatV_s <- vcovHC(prop_femodel, method = c("arellano"), type = "HC1",
                         cluster = "group")
        dyn_fe2[i]<-MASS::mvrnorm (n = 1 , mu = hatB_s , Sigma = hatV_s )
        
}

round(quantile(dyn_fe2, probs = c(0.05, 0.5,.95)), 2)


# Validate w/ rent data -----------------------------------------------

# import data on average rent prices in state years. 
rent <- rio::import(here("B-analysis-data/final_rentvote.rds")) %>% 
        dplyr::select(-year_month, -month) %>% 
        ungroup() %>% 
        mutate(rent_z = arm::rescale(rent),
               demshare_z = arm::rescale(demshare)) %>% 
        glimpse()

# merge with MW prefs 
dfrent.main <- comb1_hdci %>%
        mutate(state_lower = tolower(statename)) %>% 
        ungroup() %>% 
        left_join(rent, by = c("state_lower" = "state", "year" = "year")) %>% 
        glimpse()

dfrent.baseline <- comb2_hdci %>%
        mutate(state_lower = tolower(statename)) %>% 
        ungroup() %>% 
        left_join(rent, by = c("state_lower" = "state", "year" = "year")) %>% 
        glimpse()

dfrent <- bind_rows(list("Main model" = comb1_hdci, 
                         "Baseline model" = comb2_hdci), 
                    .id = "Model") %>% 
        mutate(state_lower = tolower(statename)) %>% 
        ungroup() %>% 
        left_join(rent, by = c("state_lower" = "state", "year" = "year")) %>% 
        glimpse()

# ~ Figure OA4. -----------------------------------------------------------

ggplot(dfrent.main, aes( x = rent, y = median)) +
        geom_point() + 
        geom_smooth() + 
        ylab("Minimum wage preference") + 
        xlab("Rent") + 
        facet_grid(.~year) + 
        scale_x_continuous(n.breaks = 4) + 
        theme_bw()

ggsave(here("D-documents/figures/validate_mw_rent.pdf"))


# Run a fixed effects regression showing how changes in rents 
#   within a state correlate with change in min wage pref.
fit.rent.main <- lfe::felm(median ~ I(rent / 80) | statename, dfrent.main)
fit.rent.baseline <- lfe::felm(median ~ I(rent / 80) | statename, dfrent.baseline)

stargazer::stargazer( fit.rent.baseline, fit.rent.main, 
                     covariate.labels = "Rent", label = "tab:rent",
                     title = "Regression of minimum wage preferences on rent prices across US states",
                     add.lines=list(c('Fixed effects for State', 'Yes', 'Yes')),
                     column.labels = c("Baseline", "Main"),
                     dep.var.labels = "Mean MW preference",
                     omit.stat = "rsq", 
                     digits = 2, 
                     star.cutoffs = c(0.05, 0.01, 0.001),
                     out = here("D-documents/tables/rent_validity.tex"),
                     type = "text")



# Validate w/ CCES data -------------------------------------

mw <- read.csv("B-analysis-data/cces_estimates.csv")

df.cces.main <- comb1_hdci %>% 
        left_join(mw,  by = c("statename" = "state", "year")) %>% 
        filter( year != 2014) %>% 
        glimpse

df.cces.baseline <- comb2_hdci %>% 
        left_join(mw,  by = c("statename" = "state", "year")) %>% 
        filter( year != 2014) %>% 
        glimpse

df.cces <- bind_rows(list("Main model" = comb1_hdci, 
                          "Baseline model" = comb2_hdci), 
                     .id = "Model") %>% 
        left_join(mw,  by = c("statename" = "state", "year")) %>% 
        filter( year != 2014) %>% 
        glimpse

# ~ Figure OA5 ------------------------------------------------------------

ggplot(df.cces.main, aes(x = minwage, y = median)) + 
        geom_point() + 
        geom_smooth() + 
        ylab("MRP estimate of local MW pref") + 
        xlab("Average MW pref (CCES)") +
        facet_grid(.~year) + 
        theme_bw() 
ggsave("D-documents/figures/cces_valid.pdf", width =  7, height = 3)

fit.cces.main <- lfe::felm(median ~ I(scale(minwage)) | year, df.cces.main)
fit.cces.baseline <- lfe::felm(median ~ I(scale(minwage)) | year, df.cces.baseline)

#   within a state correlate with change in min wage pref.
stargazer::stargazer(fit.cces.baseline, fit.cces.main,
                     covariate.labels = "CCES minimum wage",
                     column.labels = c("Baseline", "Main"),
                     label = "tab:cces",
                     add.lines=list(c("Fixed effects for year", "Yes", "Yes")),
                     title = "Regression of minimum wage preferences on CCES estimates",
                     dep.var.labels = "Mean MW preference",
                     omit.stat = "rsq",
                     digits = 2,
                     star.cutoffs = c(0.05, 0.01, 0.001),
                     out = here("D-documents/tables/cces_validity.tex"),
                     type = "text")


# Bimodal -----------------------------------------------------------------

# run a naive MLM regression without accounting for excess 0s. 
data_bi <- mydata %>% 
        select(response, demshare, female, edu, racecat, age, year, item,
               stateyear, state) %>% 
        na.omit() %>% 
        glimpse

fit4.0 <- stan_lmer(response ~ 1 + demshare +
                            (1 | female:age)+
                            (1 | edu)+
                            (1 | racecat)+
                            (1 | year)+
                            (1 | item) +
                            (1 | stateyear) +
                            (1 | state),
                    data = data_bi,
                    chains=4,
                    seed = 1, iter=2000,
                    control = list(adapt_delta = 0.95))
saveRDS(fit4.0, here("B-analysis-data/models/fit4.0.rds"))

fit4.0 <- readRDS(here("B-analysis-data/models/fit4.0.rds"))

fit4_post <- rstanarm::posterior_epred(fit4.0, newdata=census, draw=4000)

plot_naive <- bayesplot::pp_check(object = data_bi$response, 
                    yrep = posterior_predict(fit4.0, draw = 50), 
                    ppc_dens_overlay)


# ~ Figure OA6. -----------------------------------------------------------

# top panel: the distribution of the raw data
p_outcome <- ggplot(data_bi, aes(x = response)) + 
        geom_density(color= "darkblue", size = 1)+
        # geom_histogram(fill= "darkblue", size = 1)+
        xlab("") + 
        xlim(-12, 50) + 
        ylab("Minimum wage prefs (Y)") + 
        theme_bw()

# middle panel: distribution of naive posterior predictions
naive_post <- posterior_predict(fit4.0, draw = 50) %>% 
        t() %>% 
        as_tibble() %>% 
        pivot_longer(everything()) %>% 
        glimpse()

p_naive <- ggplot(naive_post, aes(x = value, group = name)) + 
        geom_density(color= "lightblue3", size = .04) +
        xlim(-12, 50) + 
        ylab("Naive model posterior (Y rep)") +  xlab("") + 
        theme_bw()

# bottom panel: combined model
combined_posteriors <- posterior_predict(myfit_binom1, newdata = data_bi, draw = 50) *
        posterior_predict(myfit_lin1, newdata = data_bi, draw = 50)


cool_post <- combined_posteriors%>% 
        t() %>%
        as_tibble() %>%
        pivot_longer(everything()) %>%
        glimpse()


p_cool <- ggplot(cool_post, aes(x = value, group = name)) + 
        geom_density(color= "lightblue3",size = .04) +
        # geom_histogram(fill= "lightblue3") +
        xlim(-12, 50) + 
        ylab("Bimodal model posterior (Y rep)") +  xlab("") + 
        theme_bw()

p_outcome / p_naive / p_cool
ggsave(here("D-documents/figures/bimodal_ppchecks.pdf"), 
       height = 7, width = 5)



# combine the cell level predictions. again a simple multiplication should work
#       we are predicting 0 for 1-p respondents. for the rest, we take what the 
#       second model says.
fit4_weighted <- (census$freq/census$statefreq * t(fit4_post) ) %>% 
        apply(., 2, function(x) tapply(x, census$stateyear, sum))


fit4_hdci <- do.call(rbind, apply(X = fit4_weighted, 
                                  MARGIN = 1, 
                                  FUN = median_hdci, .width = .9, 
                                  .simple_names = T)) %>% 
        mutate(stateyear = stateyear) %>% 
        separate(stateyear, c("statename", "year"), -4, remove = F) %>% 
        mutate(statename = trimws(statename), 
               year = as.double(year)) %>% 
        rename(median = .value, 
               state_year = stateyear)


all_preds <- bind_rows(list("naive" = fit4_hdci, 
                            "bimodal" = comb1_hdci), 
                       .id = "version") %>% 
        mutate(credible_int = .upper - .lower) %>% 
        glimpse


# ~ Figure OA7. -------------------------------------------------------------

ggplot(all_preds,  aes(x = fct_reorder(statename, median),
                       y = median, color = version)) + 
        geom_point(position = position_dodge(0.5)) +
        geom_errorbar(aes(ymin = .lower, ymax = .upper), 
                      position = position_dodge(0.5),
                      width = 0) +
        coord_flip()+ 
        facet_grid(. ~ year) + 
        xlab("") + ylab("Minimum wage preference") +
        # ggtitle("MRP estimates of state minimum wage prefs") + 
        theme_minimal()

ggsave(here("D-documents/figures/old_new_compared.pdf"), 
       height = 7, width = 6)


# ~ Figure OA8. -----------------------------------------------------------

# reshape data for plotting MW estimates in naive (unimodal) vs main model
p_meanestimates <- all_preds %>% 
        dplyr::select(version, median, state_year) %>% 
        pivot_wider(names_from = version, values_from = median) %>% 
        
ggplot(., aes(y = bimodal, x = naive)) + 
        geom_point(alpha = .5) + 
        geom_abline(slope = 1, intercept = 0) +
        geom_smooth( method = "lm", se = F) +
        xlab("Naive model") + ylab("Bimodal model") +
        ggtitle("Mean predictions") +
        theme_bw()

# reshape data for plotting uncertainty estimates in naive (unimodal) vs main model
p_credibleint <- all_preds %>% 
        dplyr::select(version, credible_int, state_year) %>% 
        pivot_wider(names_from = version, values_from = credible_int) %>% 
        
        ggplot(., aes(y = bimodal, x = naive)) + 
        geom_point(alpha = .5) + 
        geom_abline(slope = 1, intercept = 0) +
        geom_smooth( method = "lm", se = F) + 
        xlim(0.5, 2.75) + ylim(0.5, 2.75) +
        xlab("Naive model") + ylab("Bimodal model") +
        ggtitle("Uncertainty of predictions") +
        theme_bw()

# combine plots and save
p_meanestimates + p_credibleint + plot_annotation(tag_levels = 'A')
ggsave(here("D-documents/figures/old_new_xyplot.pdf"), 
       width = 7, height = 3.8)

# average (and min max) difference between two estimates 
all_preds %>% 
        dplyr::select(version, median, state_year) %>% 
        pivot_wider(names_from = version, values_from = median) %>% 
        mutate(bias = bimodal - naive) %>% 
        summarize(mean = mean(bias), 
                  max = max(bias), 
                  min = min(bias))


# average (and min max) proportion diff between two estimates.
all_preds %>% 
        dplyr::select(version, credible_int, state_year) %>% 
        pivot_wider(names_from = version, values_from = credible_int) %>% 
        mutate(bias = bimodal / naive) %>% 
        summarize(mean = mean(bias), 
                  max = max(bias), 
                  min = min(bias))


# Baseline vs Main  -------------------------------------------------------

# this section compares our baseline model (with simple quadratic time trend)
# and the main model, which uses election data as a dynamic smoother 
# (ie predictor of time trends in state MW prefs).

# trim estimates from main model
lean_main <- comb1_hdci %>% 
        transmute(main = median, statename, year)
# combine with baseline model
comp_preds <- comb2_hdci %>% 
        transmute(baseline = median, statename, year) %>% 
        left_join(lean_main) %>% 
        mutate(state = tolower(statename)) %>% 
        # merge with election results 
        left_join(select(presvote12, -year))

# ~ Figure OA3 ------------------------------------------------------------

ggplot(comp_preds, aes(x = baseline, y = main, color = demshare)) + 
        geom_point() +
        geom_abline(aes(intercept = 0, slope = 1)) + 
        facet_wrap(~year) + 
        xlab("Baseline model predictions") + 
        ylab("Main model predictions") + 
        xlim(c(8, 15)) + ylim(c(8, 15)) + 
        scale_color_gradientn(colors = c("red", "purple", "blue", "darkblue"), 
                              name = "Obama '12 \nVote Share") +
        theme_bw()
ggsave("D-documents/figures/baseline_v_main.pdf", width = 5, height = 4)        


# Some additional models comparing both baseline and main models.
#       to naive mean estimates based on raw survey data. 
naive_preds <- mydata %>% 
        group_by(state, year) %>% 
        summarise(naive = mean(response, na.rm = T)) %>% 
        ungroup()

# combine all 3 estimates into a single DF
comp_preds2 <- comp_preds %>% 
        left_join(naive_preds)

# naive vs baseline
ggplot(comp_preds2, aes(x = naive, y = baseline, color = demshare)) + 
        geom_point() +
        geom_abline(aes(intercept = 0, slope = 1)) + 
        facet_wrap(~year) + 
        xlab("Naive means") +
        ylab("Baseline model predictions") +
        xlim(c(8, 15)) + ylim(c(8, 15)) + 
        scale_color_gradientn(colors = c("red", "purple", "blue", "darkblue"), 
                              guide = NULL) +
        theme_bw() + 

# naive vs main 
ggplot(comp_preds2, aes(x = naive, y = main, color = demshare)) + 
        geom_point() +
        geom_abline(aes(intercept = 0, slope = 1)) + 
        facet_wrap(~year) + 
        xlab("Naive means") +
        ylab("Main model predictions") +
        xlim(c(8, 15)) + ylim(c(8, 15)) + 
        scale_color_gradientn(colors = c("red", "purple", "blue", "darkblue"), 
                              name = "Obama '12 \nVote Share") +
        theme_bw()
